//modified from code by David Eberly[eberly@magic-software.com]
//original code comes from C Recipe
#pragma once
#include "Math.hpp"
#include "Vector.hpp"
#include "Utility/Log.hpp"
namespace zzz{
#define SIGN(a, b) ((b)<0 ? -fabs(a) : fabs(a))

template<typename T, int N>
class SymEigenSystem
{
private:
  void tred2(T a[N+1][N+1], T d[N+1], T e[N+1])
  {
    for (int i=N; i>=2; i--) {
      int l=i-1;
      T h=0;
      T scale=0;
      if (l > 1) {
        for (int k=1; k<=l; k++) scale += Abs<T>(a[i][k]);
        if (scale == 0.0) {
          e[i]=a[i][l];
        } else {
          for (int k=1; k<=l; k++) {
            a[i][k] /= scale;
            h += a[i][k]*a[i][k];
          }
          T f=a[i][l];
          T g = f>0 ? -Sqrt<T>(h) : Sqrt<T>(h);
          e[i]=scale*g;
          h -= f*g;
          a[i][l]=f-g;
          f=0;
          for (int j=1; j<=l; j++) {
            // Next statement can be omitted if eigenvectors not wanted
            a[j][i]=a[i][j]/h;
            g=0;
            for (int k=1; k<=j; k++) g += a[j][k]*a[i][k];
            for (int k=j+1; k<=l; k++) g += a[k][j]*a[i][k];
            e[j]=g/h;
            f += e[j]*a[i][j];
          }
          T hh=f/(h+h);
          for (int j=1; j<=l; j++) {
            f=a[i][j];
            g=e[j]=e[j]-hh*f;
            for (int k=1; k<=j; k++) 
              a[j][k] -= (f*e[k]+g*a[i][k]);
          }
        }
      } else {
        e[i]=a[i][l];
      }
      d[i]=h;
    }
    // Next statement can be omitted if eigenvectors not wanted
    d[1]=0.0;
    e[1]=0.0;
    // Contents of this loop can be omitted if eigenvectors not
    //  wanted except for statement d[i]=a[i][i]; 
    for (int i=1; i<=N; i++) {
      int l=i-1;
      if (d[i]) {
        for (int j=1; j<=l; j++) {
          T g=0.0;
          for (int k=1; k<=l; k++)
            g += a[i][k]*a[k][j];
          for (int k=1; k<=l; k++)
            a[k][j] -= g*a[k][i];
        }
      }
      d[i]=a[i][i];
      a[i][i]=1.0;
      for (int j=1; j<=l; j++) 
        a[j][i]=a[i][j]=0.0;
    }
  }


  void tqli(T d[N+1], T e[N+1], T z[N+1][N+1])
  {
    for (int i=2; i<=N; i++) e[i-1]=e[i];
    e[N]=0.0;
    for (int l=1; l<=N; l++) {
      int iter=0;
      int Mat;
      do {
        for (Mat=l;Mat<=N-1;Mat++) {
          T dd=Abs<T>(d[Mat])+Abs<T>(d[Mat+1]);
          if (fabs(e[Mat])+dd == dd) break;
        }
        if (Mat != l) {
          if (iter++ == 30) {
            ZLOGI << "Too many iterations in TQLI" << endl;
            return;
          }
          T g=(d[l+1]-d[l])/(2.0*e[l]);
          T r=Sqrt<T>((g*g)+1.0);
          g=d[Mat]-d[l]+e[l]/(g + (g<0?-Abs<T>(r):Abs<T>(r)));
          T s=1;
          T c=1;
          T p=0;
          for (int i=Mat-1; i>=l; i--) {
            T f=s*e[i];
            T b=c*e[i];
            if (fabs(f) >= fabs(g)) {
              c=g/f;
              r=(float)(sqrt((c*c)+1.0));
              e[i+1]=f*r;
              c *= (T)(s=(T)1.0/r);
            } else {
              s=f/g;
              r=(float)sqrt((s*s)+1.0);
              e[i+1]=g*r;
              s *= (T)(c=(T)1.0/r);
            }
            g=d[i+1]-p;
            r=(T)((d[i]-g)*s+2.0*c*b);
            p=s*r;
            d[i+1]=g+p;
            g=c*r-b;
            // Next loop can be omitted if eigenvectors not wanted 
            for (int k=1; k<=N; k++) {
              f=z[k][i+1];
              z[k][i+1]=s*z[k][i]+c*f;
              z[k][i]=c*z[k][i]-s*f;
            }
          }
          d[l]=d[l]-p;
          e[l]=g;
          e[Mat]=0.0;
        }
      } while (Mat != l);
    }

    T max_val;
    int max_index;
    for(int i=1; i<=N-1; i++) {
      max_val = d[i];
      max_index = i;
      for(int k=i+1; k<=N; k++) {
        if (max_val < d[k]) {
          max_val = d[k]; 
          max_index = k; 
        }
        if (max_index != i) {
          e[1] = d[i]; d[i] = d[max_index]; d[max_index] = e[1];
          for(int l=1; l<=N; l++) {
            e[1] = z[l][i]; z[l][i] = z[l][max_index]; z[l][max_index] = e[1];
          }
        }
      }
    }
  }

  void Decompose(T **Mat, T **EigenVector, T *EigenValue)
  {
    T a[N+1][N+1];
    T d[N+1], e[N+1];  // data structure for tred2 and tqli - don't ask

    for(int i=0; i<N; i++) for(int j=0; j<N; j++) 
      a[i+1][j+1] = Mat[i][j];

    tred2(a, d, e);
    tqli(d, e, a);

    for (int i=0; i<N; i++) {
      EigenValue[i] = d[i+1];
      for (int j=0; j<N; j++)
        EigenVector[j][i] = a[i+1][j+1];   // order should be swapped
    }
  }

public:
  void Decompose(const MatrixBase<N, N, T>& Mat, MatrixBase<N, N, T>& EigenVector, Vector<N, T>& EigenValue)
  {
    T a[N+1][N+1];
    T d[N+1], e[N+1];  // data structure for tred2 and tqli - don't ask

    for(int i=0; i<N; i++) for(int j=0; j<N; j++) 
      a[i+1][j+1] = Mat.At(i, j);

    tred2(a, d, e);
    tqli(d, e, a);

    Vector<N, T> eigenv;
    Vector<N, Vector<N, T> > eigenvec;
    for (int i=0; i<N; i++) {
      eigenv[i] = d[i+1];
      for (int j=0; j<N; j++)
        eigenvec[j][i] = a[i+1][j+1];   // order should be swapped
    }

    vector<pair<T, zuint> > eigen_value_sort;
    eigen_value_sort.reserve(N);
    for (zuint i = 0; i < N; i++)
      eigen_value_sort.push_back(make_pair(eigenv[i], i));
    sort(eigen_value_sort.rbegin(), eigen_value_sort.rend());
    for (zuint i = 0; i < N; i++) {
      EigenValue[i] = eigen_value_sort[i].first;
      EigenVector.Row(i) = eigenvec[eigen_value_sort[i].second];
    }

  }
};
}